summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorliamwhite <liamwhite@users.noreply.github.com>2023-12-16 17:47:21 +0100
committerGitHub <noreply@github.com>2023-12-16 17:47:21 +0100
commit3bc7575c47f2460b249702c2844687b6c0106cc8 (patch)
tree540ca2ced701b86da44fbd674dedbdcf81b83117
parentMerge pull request #12358 from liamwhite/optimized-alloc (diff)
parentvideo_core: lock interval map update (diff)
downloadyuzu-3bc7575c47f2460b249702c2844687b6c0106cc8.tar
yuzu-3bc7575c47f2460b249702c2844687b6c0106cc8.tar.gz
yuzu-3bc7575c47f2460b249702c2844687b6c0106cc8.tar.bz2
yuzu-3bc7575c47f2460b249702c2844687b6c0106cc8.tar.lz
yuzu-3bc7575c47f2460b249702c2844687b6c0106cc8.tar.xz
yuzu-3bc7575c47f2460b249702c2844687b6c0106cc8.tar.zst
yuzu-3bc7575c47f2460b249702c2844687b6c0106cc8.zip
-rw-r--r--src/tests/video_core/memory_tracker.cpp6
-rw-r--r--src/video_core/buffer_cache/word_manager.h2
-rw-r--r--src/video_core/rasterizer_accelerated.cpp99
-rw-r--r--src/video_core/rasterizer_accelerated.h29
-rw-r--r--src/video_core/rasterizer_interface.h2
-rw-r--r--src/video_core/shader_cache.cpp4
-rw-r--r--src/video_core/texture_cache/texture_cache.h10
7 files changed, 73 insertions, 79 deletions
diff --git a/src/tests/video_core/memory_tracker.cpp b/src/tests/video_core/memory_tracker.cpp
index 618793668..2dbff21af 100644
--- a/src/tests/video_core/memory_tracker.cpp
+++ b/src/tests/video_core/memory_tracker.cpp
@@ -23,13 +23,13 @@ constexpr VAddr c = 16 * HIGH_PAGE_SIZE;
class RasterizerInterface {
public:
- void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
+ void UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) {
const u64 page_start{addr >> Core::Memory::YUZU_PAGEBITS};
const u64 page_end{(addr + size + Core::Memory::YUZU_PAGESIZE - 1) >>
Core::Memory::YUZU_PAGEBITS};
for (u64 page = page_start; page < page_end; ++page) {
int& value = page_table[page];
- value += delta;
+ value += (cache ? 1 : -1);
if (value < 0) {
throw std::logic_error{"negative page"};
}
@@ -546,4 +546,4 @@ TEST_CASE("MemoryTracker: Cached write downloads") {
REQUIRE(!memory_track->IsRegionGpuModified(c + PAGE, PAGE));
memory_track->MarkRegionAsCpuModified(c, WORD);
REQUIRE(rasterizer.Count() == 0);
-} \ No newline at end of file
+}
diff --git a/src/video_core/buffer_cache/word_manager.h b/src/video_core/buffer_cache/word_manager.h
index a336bde41..95b752055 100644
--- a/src/video_core/buffer_cache/word_manager.h
+++ b/src/video_core/buffer_cache/word_manager.h
@@ -473,7 +473,7 @@ private:
VAddr addr = cpu_addr + word_index * BYTES_PER_WORD;
IteratePages(changed_bits, [&](size_t offset, size_t size) {
rasterizer->UpdatePagesCachedCount(addr + offset * BYTES_PER_PAGE,
- size * BYTES_PER_PAGE, add_to_rasterizer ? 1 : -1);
+ size * BYTES_PER_PAGE, add_to_rasterizer);
});
}
diff --git a/src/video_core/rasterizer_accelerated.cpp b/src/video_core/rasterizer_accelerated.cpp
index f200a650f..3c9477f6e 100644
--- a/src/video_core/rasterizer_accelerated.cpp
+++ b/src/video_core/rasterizer_accelerated.cpp
@@ -3,6 +3,7 @@
#include <atomic>
+#include "common/alignment.h"
#include "common/assert.h"
#include "common/common_types.h"
#include "common/div_ceil.h"
@@ -11,61 +12,65 @@
namespace VideoCore {
+static constexpr u16 IdentityValue = 1;
+
using namespace Core::Memory;
-RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_)
- : cached_pages(std::make_unique<CachedPages>()), cpu_memory{cpu_memory_} {}
+RasterizerAccelerated::RasterizerAccelerated(Memory& cpu_memory_) : map{}, cpu_memory{cpu_memory_} {
+ // We are tracking CPU memory, which cannot map more than 39 bits.
+ const VAddr start_address = 0;
+ const VAddr end_address = (1ULL << 39);
+ const IntervalType address_space_interval(start_address, end_address);
+ const auto value = std::make_pair(address_space_interval, IdentityValue);
+
+ map.add(value);
+}
RasterizerAccelerated::~RasterizerAccelerated() = default;
-void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {
- u64 uncache_begin = 0;
- u64 cache_begin = 0;
- u64 uncache_bytes = 0;
- u64 cache_bytes = 0;
-
- std::atomic_thread_fence(std::memory_order_acquire);
- const u64 page_end = Common::DivCeil(addr + size, YUZU_PAGESIZE);
- for (u64 page = addr >> YUZU_PAGEBITS; page != page_end; ++page) {
- std::atomic_uint16_t& count = cached_pages->at(page >> 2).Count(page);
-
- if (delta > 0) {
- ASSERT_MSG(count.load(std::memory_order::relaxed) < UINT16_MAX, "Count may overflow!");
- } else if (delta < 0) {
- ASSERT_MSG(count.load(std::memory_order::relaxed) > 0, "Count may underflow!");
- } else {
- ASSERT_MSG(false, "Delta must be non-zero!");
- }
+void RasterizerAccelerated::UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) {
+ std::scoped_lock lk{map_lock};
- // Adds or subtracts 1, as count is a unsigned 8-bit value
- count.fetch_add(static_cast<u16>(delta), std::memory_order_release);
-
- // Assume delta is either -1 or 1
- if (count.load(std::memory_order::relaxed) == 0) {
- if (uncache_bytes == 0) {
- uncache_begin = page;
- }
- uncache_bytes += YUZU_PAGESIZE;
- } else if (uncache_bytes > 0) {
- cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes,
- false);
- uncache_bytes = 0;
- }
- if (count.load(std::memory_order::relaxed) == 1 && delta > 0) {
- if (cache_bytes == 0) {
- cache_begin = page;
- }
- cache_bytes += YUZU_PAGESIZE;
- } else if (cache_bytes > 0) {
- cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true);
- cache_bytes = 0;
+ // Align sizes.
+ addr = Common::AlignDown(addr, YUZU_PAGESIZE);
+ size = Common::AlignUp(size, YUZU_PAGESIZE);
+
+ // Declare the overall interval we are going to operate on.
+ const VAddr start_address = addr;
+ const VAddr end_address = addr + size;
+ const IntervalType modification_range(start_address, end_address);
+
+ // Find the boundaries of where to iterate.
+ const auto lower = map.lower_bound(modification_range);
+ const auto upper = map.upper_bound(modification_range);
+
+ // Iterate over the contained intervals.
+ for (auto it = lower; it != upper; it++) {
+ // Intersect interval range with modification range.
+ const auto current_range = modification_range & it->first;
+
+ // Calculate the address and size to operate over.
+ const auto current_addr = current_range.lower();
+ const auto current_size = current_range.upper() - current_addr;
+
+ // Get the current value of the range.
+ const auto value = it->second;
+
+ if (cache && value == IdentityValue) {
+ // If we are going to cache, and the value is not yet referenced, then cache this range.
+ cpu_memory.RasterizerMarkRegionCached(current_addr, current_size, true);
+ } else if (!cache && value == IdentityValue + 1) {
+ // If we are going to uncache, and this is the last reference, then uncache this range.
+ cpu_memory.RasterizerMarkRegionCached(current_addr, current_size, false);
}
}
- if (uncache_bytes > 0) {
- cpu_memory.RasterizerMarkRegionCached(uncache_begin << YUZU_PAGEBITS, uncache_bytes, false);
- }
- if (cache_bytes > 0) {
- cpu_memory.RasterizerMarkRegionCached(cache_begin << YUZU_PAGEBITS, cache_bytes, true);
+
+ // Update the set.
+ const auto value = std::make_pair(modification_range, IdentityValue);
+ if (cache) {
+ map.add(value);
+ } else {
+ map.subtract(value);
}
}
diff --git a/src/video_core/rasterizer_accelerated.h b/src/video_core/rasterizer_accelerated.h
index e6c0ea87a..f1968f186 100644
--- a/src/video_core/rasterizer_accelerated.h
+++ b/src/video_core/rasterizer_accelerated.h
@@ -3,8 +3,8 @@
#pragma once
-#include <array>
-#include <atomic>
+#include <mutex>
+#include <boost/icl/interval_map.hpp>
#include "common/common_types.h"
#include "video_core/rasterizer_interface.h"
@@ -21,28 +21,17 @@ public:
explicit RasterizerAccelerated(Core::Memory::Memory& cpu_memory_);
~RasterizerAccelerated() override;
- void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) override;
+ void UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) override;
private:
- class CacheEntry final {
- public:
- CacheEntry() = default;
+ using PageIndex = VAddr;
+ using PageReferenceCount = u16;
- std::atomic_uint16_t& Count(std::size_t page) {
- return values[page & 3];
- }
+ using IntervalMap = boost::icl::interval_map<PageIndex, PageReferenceCount>;
+ using IntervalType = IntervalMap::interval_type;
- const std::atomic_uint16_t& Count(std::size_t page) const {
- return values[page & 3];
- }
-
- private:
- std::array<std::atomic_uint16_t, 4> values{};
- };
- static_assert(sizeof(CacheEntry) == 8, "CacheEntry should be 8 bytes!");
-
- using CachedPages = std::array<CacheEntry, 0x2000000>;
- std::unique_ptr<CachedPages> cached_pages;
+ IntervalMap map;
+ std::mutex map_lock;
Core::Memory::Memory& cpu_memory;
};
diff --git a/src/video_core/rasterizer_interface.h b/src/video_core/rasterizer_interface.h
index af1469147..fd42d26b5 100644
--- a/src/video_core/rasterizer_interface.h
+++ b/src/video_core/rasterizer_interface.h
@@ -162,7 +162,7 @@ public:
}
/// Increase/decrease the number of object in pages touching the specified region
- virtual void UpdatePagesCachedCount(VAddr addr, u64 size, int delta) {}
+ virtual void UpdatePagesCachedCount(VAddr addr, u64 size, bool cache) {}
/// Initialize disk cached resources for the game being emulated
virtual void LoadDiskResources(u64 title_id, std::stop_token stop_loading,
diff --git a/src/video_core/shader_cache.cpp b/src/video_core/shader_cache.cpp
index e81cd031b..a109f9cbe 100644
--- a/src/video_core/shader_cache.cpp
+++ b/src/video_core/shader_cache.cpp
@@ -132,7 +132,7 @@ void ShaderCache::Register(std::unique_ptr<ShaderInfo> data, VAddr addr, size_t
storage.push_back(std::move(data));
- rasterizer.UpdatePagesCachedCount(addr, size, 1);
+ rasterizer.UpdatePagesCachedCount(addr, size, true);
}
void ShaderCache::InvalidatePagesInRegion(VAddr addr, size_t size) {
@@ -209,7 +209,7 @@ void ShaderCache::UnmarkMemory(Entry* entry) {
const VAddr addr = entry->addr_start;
const size_t size = entry->addr_end - addr;
- rasterizer.UpdatePagesCachedCount(addr, size, -1);
+ rasterizer.UpdatePagesCachedCount(addr, size, false);
}
void ShaderCache::RemoveShadersFromStorage(std::span<ShaderInfo*> removed_shaders) {
diff --git a/src/video_core/texture_cache/texture_cache.h b/src/video_core/texture_cache/texture_cache.h
index 0d5a1709f..d7941f6a4 100644
--- a/src/video_core/texture_cache/texture_cache.h
+++ b/src/video_core/texture_cache/texture_cache.h
@@ -2080,7 +2080,7 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
ASSERT(False(image.flags & ImageFlagBits::Tracked));
image.flags |= ImageFlagBits::Tracked;
if (False(image.flags & ImageFlagBits::Sparse)) {
- rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, 1);
+ rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, true);
return;
}
if (True(image.flags & ImageFlagBits::Registered)) {
@@ -2091,13 +2091,13 @@ void TextureCache<P>::TrackImage(ImageBase& image, ImageId image_id) {
const auto& map = slot_map_views[map_view_id];
const VAddr cpu_addr = map.cpu_addr;
const std::size_t size = map.size;
- rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+ rasterizer.UpdatePagesCachedCount(cpu_addr, size, true);
}
return;
}
ForEachSparseSegment(image,
[this]([[maybe_unused]] GPUVAddr gpu_addr, VAddr cpu_addr, size_t size) {
- rasterizer.UpdatePagesCachedCount(cpu_addr, size, 1);
+ rasterizer.UpdatePagesCachedCount(cpu_addr, size, true);
});
}
@@ -2106,7 +2106,7 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
ASSERT(True(image.flags & ImageFlagBits::Tracked));
image.flags &= ~ImageFlagBits::Tracked;
if (False(image.flags & ImageFlagBits::Sparse)) {
- rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, -1);
+ rasterizer.UpdatePagesCachedCount(image.cpu_addr, image.guest_size_bytes, false);
return;
}
ASSERT(True(image.flags & ImageFlagBits::Registered));
@@ -2117,7 +2117,7 @@ void TextureCache<P>::UntrackImage(ImageBase& image, ImageId image_id) {
const auto& map = slot_map_views[map_view_id];
const VAddr cpu_addr = map.cpu_addr;
const std::size_t size = map.size;
- rasterizer.UpdatePagesCachedCount(cpu_addr, size, -1);
+ rasterizer.UpdatePagesCachedCount(cpu_addr, size, false);
}
}